#!/usr/bin/perl -w
use strict;
use warnings;
use DBI;
require('config.pl');

my $db_conn = DBI->connect("DBI:mysql:database=houxue;host=$ENV{'db_host'}", $ENV{'db_user'}, $ENV{'db_password'});
$db_conn->do('set names UTF8');

for my $i (1...99999) {
	my $url = sprintf("http://www.houxue.com/xuexiao/%05d/jieshao.htm", $i);
	my $html = `curl -A 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)' -s -i --speed-time 5 --speed-limit 50000 --connect-timeout 60 -m 300 --max-redirs 0 '$url'`;
	next if (index($html, '很抱歉') >= 0);
	my $icon_name = $1 if ($html =~ /src="http:\/\/www\.houxue\.com\/loadimage\.php\?id=\d+" alt="([\d\D]+?)"/);
	if (!defined($icon_name)) {
		$icon_name = '';
	}
	my $name = $1 if ($html =~ /<h1>([\d\D]+?)<\/h1>/);
	if (!defined($name)) {
		$name = $1 if ($html =~ /<h1 class="w1">([\d\D]+?)<\/h1>/);
	}
	my $intro = $1 if ($html =~ /<\/table>\s*<\/span>\s*([\d\D]*?)<\/li>/);
	if (!defined($intro)) {
		$intro = $1 if ($html =~ /<p class="tm">([\d\D]*?)<\/p>/);
	}
	my $school_name = $1 if ($html =~ /<div class="colora">学校名称：<\/div>\s*<div class="colorb">([\d\D]*?)<\/div>/);
	if (!defined($school_name)) {
		$school_name = $1 if ($html =~ /<li class="colora">学校名称：<\/li>\s*<li>([\d\D]*?)<\/li>/);
	}
	my $address = $1 if ($html =~ /<div class="colora">学校地址：<\/div>\s*<div class="colorb">([\d\D]*?)<\/div>/);
	if (!defined($address)) {
		$address = $1 if ($html =~ /<li class="colora">学校地址：<\/li>\s*<li>([\d\D]*?)<\/li>/);
	}
	my $route = $1 if ($html =~ /<div class="colora">交通线路：<\/div>\s*<div class="colorb">([\d\D]*?)<\/div>/);
	if (!defined($route)) {
		$route = $1 if ($html =~ /<li class="colora">交通线路：<\/li>\s*<li>([\d\D]*?)<\/li>/);
	}
	my $contact = $1 if ($html =~ /<div class="colora">联系人：<\/div>\s*<div class="colorb">([\d\D]*?)<\/div>/);
	if (!defined($contact)) {
		$contact = $1 if ($html =~ /<li class="colora">联系人：<\/li>\s*<li>([\d\D]*?)<\/li>/);
	}
	my $phone_number = $1 if ($html =~ /style="font-size:16px;padding-top:2px;">([\d\D]*?)<\/span>/);
	if (!defined($phone_number)) {
		$phone_number = $1 if ($html =~ /<li class="colora">联系电话：<\/li>\s*<li class="tel1">([\d\D]*?)<\/li>/);
	}
	if (!defined($phone_number)) {
		$phone_number = '';
	}
	print "$i $icon_name\t$name\t$school_name\t$address\t$route\t$contact\t$phone_number\n";
	next if (!defined($name));
	$db_conn->do("replace into school(id, name, icon_name, intro, school_name, address, route, phone_number) values($i, '$name', '$icon_name', ".$db_conn->quote($intro).", '$school_name', '$address', '$route', '$phone_number')");
}
